knitr::opts_chunk$set(echo = TRUE, message=FALSE)

Pilot data for ‘Online incidental statistical learning of audiovisual word sequences in adults - a registered report’

Task design and analysis is a collaborative effort between Kuppuraj, Paul Thompson, Mihaela Duta and Dorothy Bishop.

Analysis of data from task version 5, August 2017.

The sequence of triplets used in this task is as follows for each set:

1, A1, S1, B1; %Adjacent deterministic
2, A1, S1, B1;
3, C1, S2, D1; %Adjacent probabilistic
4, C1, S2, D2;
5,  E1, R, F1;%Non-adjacent deterministic. R is random
6,  E1, R, F1;
7, R, R, R; %Totally random
8, R, R, R;

This code is used in the Matlab program used to generate stimulus sequences, which is kuppuseqgen_2A2C2E2R_seqgen.m

This uses stimulus specifications from: ‘all_stimuli_routines_3cond_kup_final.xlsx’ which is a giant matrix listing all word stimuli, and documenting where there are ‘conflicts’, such that two stimuli should not co-occur in the same array - either because of the same first phoneme, or because of visual confusibility.

The matlab program also checks that two triplets with the same initial two elements do not occur in succession.

Terminology when referring to the experiment:

Data analysis

Current version of analysis in R is ‘mixed effects regression discontinuity_V5.R’ Useful references for linear mixed effects modeling are: http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html

https://rstudio-pubs-static.s3.amazonaws.com/33653_57fc7b8e5d484c909b615d8633c01d51.html

https://stats.stackexchange.com/questions/242109/model-failed-to-converge-warning-in-lmer

Initial steps: may need to install packages.

#setwd("c:/Users/pthompson/Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5")
#directory where data are found - currently set to Mac
options(scipen=999) #avoid scientific format
#library(devtools)
#install_github(repo = "RDDtools", username = "MatthieuStigler", subdir = "RDDtools")

library(RDDtools)
library(optimx)
library(lme4)
library(ggplot2)
library(doBy)
library(gridExtra)
library(tidyverse)
library(RColorBrewer)
library(splines)
#substitute windows for quartz if nonmac
if(.Platform$OS.type=="windows") {
  quartz<-function() windows()
} 

Specifying basic task parameters

breakblock<-c(7,8)
nsets<-5
nblocks<-10
phase1.start<-1
phase1.end<-nsets*(breakblock[1]-1)
phase2.start<-phase1.end+1
phase2.end<-nsets*(breakblock[2])
phase3.start<-phase2.end+1
phase3.end<-nsets*nblocks
phase1.range<-c(phase1.start:phase1.end)
phase2.range<-c(phase2.start:phase2.end)
phase3.range<-c(phase3.start:phase3.end)

namelist=c('pilot_01','pilot_02','pilot_03','pilot_04','pilot_05','pilot_06','pilot_07','pilot_08','pilot_09')
nsubs<-length(namelist)
#initialise data frames for holding main data and regression coefficients for individual participants
main.data<-data.frame(ID=factor(), SetInd=integer(), Type=integer(), TargetRT=integer())
thistype<-c('rand','Adj_D','Adj_P','Non_D')
lmsummarycoefs<-data.frame(matrix(NA,nsubs*25,nrow=nsubs))

Now process each person’s data and add to main summary file

for (i in 1:nsubs){
  
  myname=namelist[i]
  mycsv=paste("c:/Users/pthompson//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/",myname,".csv",sep="")
    mycsv=paste("/Users/dorothybishop//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/",myname,".csv",sep="")
   

  mydata= read.csv(mycsv)  # read csv file 
  #get rid of RTs of inaccurate responses:replace with NA. 
  Rwdata=mydata
  rawdata=Rwdata[ ,c(1,10,12,26)]
  RTcutoff<-2000
  
  ##############
  rawdata$TargetRT[rawdata$TargetACC==0]<-NA
  rawdata$TargetRT[rawdata$TargetRT<(-199)]<-NA #include anticipations up to -200
  rawdata$TargetRT[rawdata$TargetRT>RTcutoff]<-RTcutoff #set long RTs to the RTcutoff value
  
  RWdata<-rawdata
  
  #rename the types 
  RWdata$Type[RWdata$Type==1]<- "Adj_D"
  RWdata$Type[RWdata$Type==2]<- "Adj_D"
  
  RWdata$Type[RWdata$Type==3]<- "Adj_P"
  RWdata$Type[RWdata$Type==4]<- "Adj_P"
  
  RWdata$Type[RWdata$Type==5]<- "Non_D"
  RWdata$Type[RWdata$Type==6]<- "Non_D"
  
  RWdata$Type[RWdata$Type==7]<- "rand"
  RWdata$Type[RWdata$Type==8]<- "rand"
  
  RWdata$Type<-as.factor(RWdata$Type)
  RWdata$Type<-factor(RWdata$Type,levels=c("rand", "Adj_D", "Adj_P", "Non_D"))
  #ensure random is the first in the list, as this will be baseline comparison in analysis
  
  #Create a new matrix that has summary data for this participant. 
  
  detaildata<- summaryBy(TargetRT ~ SetInd+Type,  data=RWdata,
  FUN=c(min), na.rm=TRUE)
  #NB only 2 points to consider, so now taking minimum, not median
  
  detaildata$ID<-rep(RWdata$ID[4],length(detaildata[,1]))
  #Need to repeat the subject ID on each line
  
  names(detaildata)<-c("SetInd", "Type", "TargetRT", "ID") #column headings
  
  main.data<-rbind(main.data,detaildata) #add to main data in long form with participants stacked below each other
}

Regression discontinuity analysis for individual participants

The package RDDtools makes it easy to compare the slopes of two portions of data. We do this for phase 1 (learning) and phase 2 (break-sequence). The t-value for this comparison can be used as an index of learning. The outputs are written to a .csv file for inspection.

library()
#add coeffs for reg discontinuity, just for learning/random phases
lmsummarycoefs<-data.frame(matrix(NA,24,nrow=nsubs))#initialise matrix
for (i in 1:nsubs){
startcol<- -5 #set so that this will append columns to right place (Historical)
for (mytype in 1:4){
  mytemp<-filter(main.data,ID==namelist[i],Type==thistype[mytype],SetInd<41)
  # using the RDDtools package
  RDD.temp<-RDDdata(y=mytemp$TargetRT,x=mytemp$SetInd,cutpoint=31)
  reg_para <- RDDreg_lm(RDDobject = RDD.temp, order = 1) #this is just linear: for higher order can increase
  startcol<-startcol+6
  endcol<-startcol+3
  lmsummarycoefs[i,startcol:endcol]<-reg_para$coefficients
  st<-summary(reg_para)[[4]]
  myt<-st[2,3]#t-value corresponding to difference in slope for two phases
  lmsummarycoefs[i,endcol+1]<-round(myt,2)
  myp<-summary(reg_para)$coefficients[2,4]#sig of slope diff for the two phases
  lmsummarycoefs[i,endcol+2]<-round(myp,4)
  colnames(lmsummarycoefs)[startcol:endcol]<-paste0(thistype[mytype],'.',names(reg_para$coefficients))
  colnames(lmsummarycoefs)[endcol+1]<-paste0(thistype[mytype],'_t.diff')
  colnames(lmsummarycoefs)[endcol+2]<-paste0(thistype[mytype],'_p.diff')
 }
}
#write.csv(lmsummarycoefs, file = paste0(datadir,"lm_discont_coeffs_linear.csv"))
shortbit<-select(lmsummarycoefs,5,6,11,12,17,18,23,24) #just cols for t-values and p
rownames(shortbit)<-namelist

shortbit
##          rand_t.diff rand_p.diff Adj_D_t.diff Adj_D_p.diff Adj_P_t.diff
## pilot_01       -0.38      0.7054         3.08       0.0040         1.39
## pilot_02       -1.81      0.0785         1.80       0.0796        -1.50
## pilot_03       -0.17      0.8680         3.28       0.0023         1.23
## pilot_04       -0.04      0.9654         8.23       0.0000         6.06
## pilot_05        0.67      0.5068         6.79       0.0000         5.08
## pilot_06       -0.97      0.3406        -0.86       0.3969         3.27
## pilot_07       -0.89      0.3777         4.74       0.0000         3.54
## pilot_08        1.87      0.0700         0.89       0.3814         1.35
## pilot_09       -0.32      0.7524         2.89       0.0065         4.01
##          Adj_P_p.diff Non_D_t.diff Non_D_p.diff
## pilot_01       0.1737         0.52       0.6058
## pilot_02       0.1421         0.04       0.9711
## pilot_03       0.2271         1.89       0.0670
## pilot_04       0.0000         5.11       0.0000
## pilot_05       0.0000         4.20       0.0002
## pilot_06       0.0024         0.02       0.9816
## pilot_07       0.0011        10.31       0.0000
## pilot_08       0.1866         0.29       0.7711
## pilot_09       0.0003         1.53       0.1346
#can view shortbit to see which participants show which effects
# Modifications to main.data to create factors etc


main.data$ID <- as.factor(main.data$ID)
main.data$log_TargetRT<-log(main.data$TargetRT+200) #to allow for anticipatory responses
main.data<-data.frame(main.data)

Plot overall data

#png(filename="c:/Users/pthompson//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/Grandavg.png") 
summarytable<-summaryBy(TargetRT~SetInd+Type,data=main.data,FUN=c(mean),na.rm=TRUE)
ggplot(summarytable,aes(x = SetInd, y = TargetRT.mean,color=Type))  +
  geom_line()

 # dev.off()

Plot the individual data

This allows us to see how well the regression discontinuity value identifies those who seem to be learning. The horizontal grey lines denote the break-seq blocks. (NB I can’t currently get this to incorporate the plot in output except by writing to file then reading back)

#png(filename="c:/Users/pthompson//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/Indivdata.png",width = 1000, height = 300)
#quartz(width=12,height=3)
ggplot(main.data, aes(x = SetInd, y = TargetRT,color=Type)) + 
  geom_line(alpha=0.75) + 
  geom_vline(aes(xintercept = 30), color = 'grey', size = 1, linetype = 'dashed') + 
  geom_vline(aes(xintercept = 40), color = 'grey', size = 1, linetype = 'dashed') +
   theme_bw()+facet_wrap(~ID)+ scale_fill_brewer(palette="Set1")+
  theme(legend.position = "top",strip.text=element_text(size=14),axis.text=element_text(size=14),axis.title=element_text(size=14,face="bold"))

#dev.off()

Linear Mixed effect model - Phase 1 (learning phase only)

#create new variables
main.data$ID <- as.factor(main.data$ID)
#main.data$log_TargetRT<-log(main.data$TargetRT+200) #to allow for anticipatory responses
main.data<-data.frame(main.data)

#####################################################################################

#create new variables
main.data$phase1<-ifelse(main.data$SetInd %in% phase1.range,1,0)
main.data$phase2<-ifelse(main.data$SetInd %in% phase2.range,1,0) #block where seq broken
main.data$phase3<-ifelse(main.data$SetInd %in% phase3.range,1,0) #block where seq restored

main.data$p123<-as.factor(main.data$phase1+2*main.data$phase2+3*main.data$phase3)
levels(main.data$p123)<-c('Learn','Break','Final')
main.data$p123<-factor(main.data$p123,levels=c("Break","Learn","Final"))#reorder factor levels

main.data$allb<-main.data$SetInd_c
w<-which(main.data$p123=='Break')
main.data$allb[w]<-main.data$b2[w]
w2<-which(main.data$p123=='Final')
main.data$allb[w2]<-main.data$b3[w2]


#redo without centring at this point
myseq<-seq(1,nblocks*nsets)
bp1<-myseq[phase1.end]+1
bp2<-myseq[phase2.end]+1
#functions to determine if value x is in phase1, phase2 or phase3
#So this ignores values for sets not in this phase (sets to zero), but has all other phases scaled
#so that they show increment across phase
b1make <- function(x, bp1) ifelse(x < bp1, x, 0) #NB altered by DB from bp1-x
b2make <- function(x, bp1, bp2) ifelse(x >= bp1 & x < bp2, x - bp1+1, 0)
b3make <- function(x, bp2) ifelse(x < bp2, 0, x - bp2+1)

#add b1, b2, b3 to main.data rather than computing during regression
#(also helps understand these functions by seeing how they relate to set)
main.data$b1<-b1make(main.data$SetInd,bp1)-mean(b1make(main.data$SetInd,bp1))
main.data$b2<-b2make(main.data$SetInd,bp1,bp2)-mean(b2make(main.data$SetInd,bp1,bp2))
main.data$b3<-b3make(main.data$SetInd,bp2)-mean(b2make(main.data$SetInd,bp1,bp2))


vshort.data<-main.data[which(main.data$p123=='Learn'),]

mod.3vc <- lmer(TargetRT ~ Type #nb will automatically do main effects as well
              +(0+b1|ID)+(0+b1|ID:Type), data = vshort.data,
              REML = TRUE,control = lmerControl(optimizer = "optimx", 
                                                calc.derivs = TRUE, optCtrl = list(method = "nlminb")))


saveRDS(mod.3vc, "mod.3vc.rds")
#saveRDS(mod.3vb, "mod.3vb.rds")
#This fits with no warning messages.
summary(mod.3vc)
## Linear mixed model fit by REML ['lmerMod']
## Formula: TargetRT ~ Type + (0 + b1 | ID) + (0 + b1 | ID:Type)
##    Data: vshort.data
## Control: 
## lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb"))
## 
## REML criterion at convergence: 14085.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2957 -0.5579  0.0468  0.6194  4.2569 
## 
## Random effects:
##  Groups   Name Variance Std.Dev.
##  ID:Type  b1      60.46   7.776 
##  ID       b1     127.22  11.279 
##  Residual      25385.20 159.327 
## Number of obs: 1080, groups:  ID:Type, 36; ID, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   654.23      11.68   56.03
## TypeAdj_D    -243.46      16.41  -14.83
## TypeAdj_P    -182.67      16.41  -11.13
## TypeNon_D     -74.24      16.41   -4.52
## 
## Correlation of Fixed Effects:
##           (Intr) TypA_D TypA_P
## TypeAdj_D -0.703              
## TypeAdj_P -0.703  0.500       
## TypeNon_D -0.703  0.500  0.500
ranef(mod.3vc)
## $`ID:Type`
##                         b1
## pilot_01:rand   -0.6734562
## pilot_01:Adj_D   2.6008124
## pilot_01:Adj_P   1.6675079
## pilot_01:Non_D  -1.9574593
## pilot_02:rand   -3.2893863
## pilot_02:Adj_D   2.7328078
## pilot_02:Adj_P   3.1780465
## pilot_02:Non_D  -1.7294831
## pilot_03:rand    7.2011086
## pilot_03:Adj_D  -8.8658447
## pilot_03:Adj_P  -4.3289490
## pilot_03:Non_D   2.7443989
## pilot_04:rand   11.9325441
## pilot_04:Adj_D  -9.4900449
## pilot_04:Adj_P  -4.2592186
## pilot_04:Non_D  -7.8037263
## pilot_05:rand   10.0261229
## pilot_05:Adj_D  -9.6395486
## pilot_05:Adj_P  -5.1360250
## pilot_05:Non_D  -2.0129672
## pilot_06:rand   -3.7780837
## pilot_06:Adj_D   5.9965759
## pilot_06:Adj_P  -0.4057772
## pilot_06:Non_D  -1.5763888
## pilot_07:rand   12.9632696
## pilot_07:Adj_D  -3.6625555
## pilot_07:Adj_P   0.1172250
## pilot_07:Non_D -17.9932035
## pilot_08:rand   -4.1900310
## pilot_08:Adj_D   1.7704182
## pilot_08:Adj_P   3.7041056
## pilot_08:Non_D  -1.3017082
## pilot_09:rand    8.6710378
## pilot_09:Adj_D  -8.2184924
## pilot_09:Adj_P  -4.5116856
## pilot_09:Non_D   2.6166806
## 
## $ID
##                    b1
## pilot_01   3.44533393
## pilot_02   1.87686401
## pilot_03  -6.83696300
## pilot_04 -20.24279382
## pilot_05 -14.22909458
## pilot_06   0.49726423
## pilot_07 -18.04358282
## pilot_08  -0.03622373
## pilot_09  -3.03514134
par(mfrow=c(1,2))
 plot(mod.3vc,type=c("p","smooth"))

 plot(mod.3vc,sqrt(abs(resid(.)))~fitted(.),type=c("p","smooth"),ylab=expression(sqrt(abs(resid))))

 plot(mod.3vc,resid(.,type="pearson")~b1, type=c("p","smooth"))

#dev.off()

#quartz()
#png(filename="merplot3.png")

qqnorm(resid(mod.3vc))
qqline(resid(mod.3vc)) # shape due to using whole numbers 1:40.

hist(resid(mod.3vc),100)

plots by participant and Type

newdat1<-expand.grid(Type=unique(vshort.data$Type),b1=unique(vshort.data$b1),ID=unique(vshort.data$ID))

  library(RColorBrewer)
  library(ggplot2)

  ggplot(vshort.data, aes(x = b1, y = TargetRT,color=Type)) + 
  geom_point(alpha=0.35) + 
  geom_line(data=newdat1,aes(y=predict(mod.3vc,newdata=newdat1)),size = .75)+
  theme_bw()+facet_wrap(~ID)+ scale_fill_brewer(palette="Set1")+
  theme(legend.position = "top",strip.text=element_text(size=12),axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold"))

Power for phase 1 LMM model: required N to achieve an adequate standard error <20 for each main effect of Type

library(lme4)
library(plyr)

  N=50
  nsim<-1 
  
  expdat_rd<-expand.grid(Type=factor(1:4),b1=unique(vshort.data$b1),ID=factor(1:N))
  
  #Recode into familiar factors for Kuppu/DB hypotheses.
  
  expdat_rd$Type<-car::recode(expdat_rd$Type,"1='Non_D';2='Adj_D';3='Adj_P';4='rand'")
  
  expdat_rd$Type<-factor(expdat_rd$Type,levels=c("rand","Adj_D", "Adj_P", "Non_D"))
  

  #Extract parameters for simulating data. (artificially reduce the effects of setInd and Type to see if power is working. Are the estimates reasonable)

    newparams_rd <- list(
    beta = getME(mod.3vc,"beta"), #fixed effects (intercept, Block, TPPR)
    theta = getME(mod.3vc, "theta"),#Random slope
    sigma = getME(mod.3vc, "sigma"))
    
    
  # Simulate:
  
  ss <- simulate(~ Type+(0+b1|ID)+(0+b1|ID:Type), nsim = nsim, newdata = expdat_rd, newparams = newparams_rd, family=gaussian, re.form=~0, allow.new.levels=TRUE)
  

  #####################################################################
  
  
 
    
    expdat_rd$TargetRT <- ss[, 1]
  
         new1<- lmer(TargetRT  ~ Type+(0+b1|ID)+(0+b1|ID:Type), data = expdat_rd, REML = TRUE,control = lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb")))
         
  summary(new1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: TargetRT ~ Type + (0 + b1 | ID) + (0 + b1 | ID:Type)
##    Data: expdat_rd
## Control: 
## lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb"))
## 
## REML criterion at convergence: 78373.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5506 -0.6627 -0.0066  0.6725  3.2753 
## 
## Random effects:
##  Groups   Name Variance Std.Dev.
##  ID:Type  b1      71.48   8.454 
##  ID       b1     157.87  12.564 
##  Residual      25112.36 158.469 
## Number of obs: 6000, groups:  ID:Type, 200; ID, 50
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  656.709      4.942  132.87
## TypeAdj_D   -233.627      6.953  -33.60
## TypeAdj_P   -186.484      6.953  -26.82
## TypeNon_D    -77.128      6.953  -11.09
## 
## Correlation of Fixed Effects:
##           (Intr) TypA_D TypA_P
## TypeAdj_D -0.703              
## TypeAdj_P -0.703  0.500       
## TypeNon_D -0.703  0.500  0.500
  ifelse(any(round(sqrt(diag(vcov(new1))),3)>20),print("SE: too large"),print("SE: OK"))
## [1] "SE: OK"
## [1] "SE: OK"

Linear mixed effects Regression discontinuity

The model has been extended to allow a regression discontinuity with random slopes and interactions between slope and Type

##########################################################################
breakblock<-c(7,8)
nsets<-5
nblocks<-10
phase1.start<-1
phase1.end<-nsets*(breakblock[1]-1)
phase2.start<-phase1.end+1
phase2.end<-nsets*(breakblock[2])
phase3.start<-phase2.end+1
phase3.end<-nsets*nblocks

phase1.range<-c(phase1.start:phase1.end)
phase2.range<-c(phase2.start:phase2.end)
phase3.range<-c(phase3.start:phase3.end)

#substitute windows for quartz if nonmac
if(.Platform$OS.type=="windows") {
  quartz<-function() windows()
} 
##########################################################################

namelist=c('pilot_01','pilot_02','pilot_03','pilot_04','pilot_05','pilot_06','pilot_07',
           'pilot_08','pilot_09')
nsubs<-length(namelist)
thistype<-c('rand','Adj_D','Adj_P','Non_D')
main.data<-data.frame(ID=factor(), SetInd=integer(), Type=integer(), TargetRT=integer())

##########################################################################

for (i in 1:nsubs){
  
  myname=namelist[i]
  mycsv=paste("c:/Users/pthompson//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/",myname,".csv",sep="")
    mycsv=paste("/Users/dorothybishop//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/",myname,".csv",sep="")
  mydata= read.csv(mycsv)  # read csv file 
  #get rid of RTs of inaccurate responses and any RT greater than 3000 ms:replace with NA. 
  Rwdata=mydata
  rawdata=Rwdata[ ,c(1,10,12,26)]
  #in the original analysis the following section will be removed 
  
  #######this bit ensures inaccurate becomes, so not eliminated in next section, and at least RT can be extracted. 
  rawdata$TargetRT[rawdata$Type==19 & rawdata$TargetRT>3000]<-2999
  ##############
  rawdata$TargetRT[rawdata$TargetACC==0]<-NA
  rawdata$TargetRT[rawdata$TargetRT<-199]<-NA #include anticipations up to -200
  rawdata$TargetRT[rawdata$TargetRT>2000]<-2000 
  
  RWdata<-rawdata
  
  #rename the types so median can be taken for each block 
  RWdata$Type[RWdata$Type==1]<- "Adj_D"
  RWdata$Type[RWdata$Type==2]<- "Adj_D"
  
  RWdata$Type[RWdata$Type==3]<- "Adj_P"
  RWdata$Type[RWdata$Type==4]<- "Adj_P"

  RWdata$Type[RWdata$Type==5]<- "Non_D"
  RWdata$Type[RWdata$Type==6]<- "Non_D"

  RWdata$Type[RWdata$Type==7]<- "rand"
  RWdata$Type[RWdata$Type==8]<- "rand"
  
  #RWdata$ID<-substring(RWdata$ID,1,2)
  RWdata$Type<-as.factor(RWdata$Type)
  RWdata$Type<-factor(RWdata$Type,levels=c("rand", "Adj_D", "Adj_P", "Non_D"))
  
  detaildata<- summaryBy(TargetRT ~ SetInd+Type,  data=RWdata,
                         FUN=c(min), na.rm=TRUE)
  #NB only 2 points to consider, so now taking minimum, not median
  # ah, but NB for Adj_Prob there are 4 points...
  
  detaildata$ID<-rep(RWdata$ID[4],length(detaildata[,1]))
  
  names(detaildata)<-c("SetInd", "Type", "TargetRT", "ID")
  
 
  main.data<-rbind(main.data,detaildata) #add to main data file in long form
  
}

main.data$ID <- as.factor(main.data$ID)
#main.data$log_TargetRT<-log(main.data$TargetRT+200) #to allow for anticipatory responses
main.data<-data.frame(main.data)


#create new variables
main.data$phase<-ifelse(main.data$SetInd %in% phase1.range,1,0)

main.data$phase<-as.factor(main.data$phase)
levels(main.data$phase)<-c('Break','Learn')


#redo without centring at this point
myseq<-seq(1,nblocks*nsets)
bp1<-myseq[phase1.end]+1

#functions to determine if value x is in phase1, phase2 or phase3
#So this ignores values for sets not in this phase (sets to zero), but has all other phases scaled
#so that they show increment across phase
b1make <- function(x, bp1) ifelse(x < bp1,  bp1-x, 0) 
b2make <- function(x, bp1) ifelse(x >= bp1, x - bp1+1, 0)

main.data$b1<-b1make(main.data$SetInd,bp1)
main.data$b2<-b2make(main.data$SetInd,bp1)

##########################################################################################  

twophase.data<-main.data[main.data$SetInd<=40,]

mod.new <- lmer(TargetRT ~ Type*phase + b2*Type+
              +(0+b1|ID:Type)+(0+b1|ID), data = twophase.data,
              REML = TRUE,control = lmerControl(optimizer = "optimx", 
                                                calc.derivs = TRUE, optCtrl = list(method = "nlminb")))

##########################################################################################  

summary(mod.new)
## Linear mixed model fit by REML ['lmerMod']
## Formula: TargetRT ~ Type * phase + b2 * Type + +(0 + b1 | ID:Type) + (0 +  
##     b1 | ID)
##    Data: twophase.data
## Control: 
## lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb"))
## 
## REML criterion at convergence: 18848.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3386 -0.5490  0.0427  0.6136  4.4484 
## 
## Random effects:
##  Groups   Name Variance Std.Dev.
##  ID:Type  b1      22.55   4.749 
##  ID       b1      55.67   7.462 
##  Residual      28260.16 168.108 
## Number of obs: 1440, groups:  ID:Type, 36; ID, 9
## 
## Fixed effects:
##                        Estimate Std. Error t value
## (Intercept)           612.31852   38.27978  15.996
## TypeAdj_D             -66.99259   54.13578  -1.237
## TypeAdj_P             -23.28148   54.13578  -0.430
## TypeNon_D               4.68148   54.13578   0.086
## phaseLearn            -15.65979   42.56128  -0.368
## b2                     -1.34478    6.16935  -0.218
## TypeAdj_D:phaseLearn -299.51708   59.75815  -5.012
## TypeAdj_P:phaseLearn -257.12820   59.75815  -4.303
## TypeNon_D:phaseLearn -214.75811   59.75815  -3.594
## TypeAdj_D:b2            5.80875    8.72477   0.666
## TypeAdj_P:b2            1.25522    8.72477   0.144
## TypeNon_D:b2            0.04377    8.72477   0.005
## 
## Correlation of Fixed Effects:
##             (Intr) TypA_D TypA_P TypN_D phsLrn b2     TA_D:L TA_P:L TN_D:L
## TypeAdj_D   -0.707                                                        
## TypeAdj_P   -0.707  0.500                                                 
## TypeNon_D   -0.707  0.500  0.500                                          
## phaseLearn  -0.899  0.636  0.636  0.636                                   
## b2          -0.886  0.627  0.627  0.627  0.797                            
## TypAdj_D:pL  0.641 -0.906 -0.453 -0.453 -0.702 -0.568                     
## TypAdj_P:pL  0.641 -0.453 -0.906 -0.453 -0.702 -0.568  0.500              
## TypNn_D:phL  0.641 -0.453 -0.453 -0.906 -0.702 -0.568  0.500  0.500       
## TypAdj_D:b2  0.627 -0.886 -0.443 -0.443 -0.564 -0.707  0.803  0.402  0.402
## TypAdj_P:b2  0.627 -0.443 -0.886 -0.443 -0.564 -0.707  0.402  0.803  0.402
## TypeNn_D:b2  0.627 -0.443 -0.443 -0.886 -0.564 -0.707  0.402  0.402  0.803
##             TA_D:2 TA_P:2
## TypeAdj_D                
## TypeAdj_P                
## TypeNon_D                
## phaseLearn               
## b2                       
## TypAdj_D:pL              
## TypAdj_P:pL              
## TypNn_D:phL              
## TypAdj_D:b2              
## TypAdj_P:b2  0.500       
## TypeNn_D:b2  0.500  0.500
ranef(mod.new)
## $`ID:Type`
##                         b1
## pilot_01:rand  -4.61489713
## pilot_01:Adj_D  6.95328761
## pilot_01:Adj_P  0.75824069
## pilot_01:Non_D  1.59650517
## pilot_02:rand  -5.18904347
## pilot_02:Adj_D  5.36569239
## pilot_02:Adj_P  1.95828364
## pilot_02:Non_D  1.68765647
## pilot_03:rand  -0.99623407
## pilot_03:Adj_D  4.68485109
## pilot_03:Adj_P -2.73180705
## pilot_03:Non_D  1.35179123
## pilot_04:rand  -3.62296921
## pilot_04:Adj_D  2.05391348
## pilot_04:Adj_P  0.23821344
## pilot_04:Non_D  2.42985991
## pilot_05:rand  -0.50401251
## pilot_05:Adj_D -2.89764584
## pilot_05:Adj_P -1.93261844
## pilot_05:Non_D  6.35905275
## pilot_06:rand  -6.35259815
## pilot_06:Adj_D  6.84156878
## pilot_06:Adj_P  1.14316038
## pilot_06:Non_D  1.69341561
## pilot_07:rand  -0.33981282
## pilot_07:Adj_D -3.75950245
## pilot_07:Adj_P  4.28323087
## pilot_07:Non_D -0.03530841
## pilot_08:rand  -6.41491353
## pilot_08:Adj_D  5.40411196
## pilot_08:Adj_P  3.49418321
## pilot_08:Non_D  1.28276219
## pilot_09:rand  -0.39248567
## pilot_09:Adj_D -6.11923772
## pilot_09:Adj_P  1.57637161
## pilot_09:Non_D  6.92920910
## 
## $ID
##                  b1
## pilot_01 11.5848714
## pilot_02  9.4359505
## pilot_03  5.6987153
## pilot_04  2.7128932
## pilot_05  2.5296298
## pilot_06  8.2090157
## pilot_07  0.3668325
## pilot_08  9.2966172
## pilot_09  4.9217792
  #regression diagnostics
 #quartz()
#  png(filename="merplot2.png")
par(mfrow=c(1,2))
plot(mod.new,type=c("p","smooth"))

plot(mod.new,sqrt(abs(resid(.)))~fitted(.),
                  type=c("p","smooth"),ylab=expression(sqrt(abs(resid))))

#plot(mod.new,resid(.,type="pearson")~twophase.data$SetInd, type=c("p","smooth"))
#dev.off()

#quartz()
#png(filename="merplot3.png")
qqnorm(resid(mod.new))
qqline(resid(mod.new)) # shape due to using whole numbers 1:40.

hist(resid(mod.new),100)

#dev.off()
###########################################################################################

startdat<-select(twophase.data,ID,Type,SetInd,phase,b1,b2)
startdat$SetInd<-as.integer(startdat$SetInd)
newdat1d<-startdat[startdat$SetInd<31,]
newdat2d<-startdat[startdat$SetInd>30,]
newdat2d<-newdat2d[newdat2d$SetInd<41,]

 #plots for lmer


 ggplot(twophase.data, aes(x = SetInd, y = TargetRT,color=Type)) + 
   geom_point(alpha=0.35) + 
   geom_vline(aes(xintercept = bp1), color = 'grey', size = 1, linetype = 'dashed') + 
   geom_line(data=newdat1d,aes(y=predict(mod.new,newdata=newdat1d)),size = .75)+
   geom_line(data=newdat2d,aes(y=predict(mod.new,newdata=newdat2d)),size = .75)+
   theme_bw()+facet_wrap(~ID)+ scale_fill_brewer(palette="Set1")+
   theme(legend.position = "top",strip.text=element_text(size=12),axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold"))

Power for two phase model: required N to achieve an adequate standard error <30 for each interaction of Type and phase.

library(lme4)
library(plyr)

  N=40
  nsim<-1 
  
  expdat_rd<-expand.grid(Type=factor(1:4),setInd=1:40,ID=factor(1:N))
  
  #Recode into familiar factors for Kuppu/DB hypotheses.
  
  expdat_rd$Type<-car::recode(expdat_rd$Type,"1='Non_D';2='Adj_D';3='Adj_P';4='rand'")
  
  expdat_rd$Type<-factor(expdat_rd$Type,levels=c("rand","Adj_D", "Adj_P", "Non_D"))
  
myseq<-seq(1,nblocks*nsets)
bp1<-myseq[phase1.end]+1

b1make <- function(x, bp1) ifelse(x < bp1,  bp1-x, 0) 
b2make <- function(x, bp1) ifelse(x >= bp1, x - bp1+1, 0)
  
  expdat_rd$b1<-b1make(expdat_rd$setInd,bp1)
  expdat_rd$b2<-b2make(expdat_rd$setInd,bp1)
  expdat_rd$phase <- ifelse(expdat_rd$setInd < bp1, 1, 0)
  
  #Extract parameters for simulating data. (artificially reduce the effects of setInd and Type to see if power is working. Are the estimates reasonable)

    newparams_rd <- list(
    beta = getME(mod.new,"beta"), #fixed effects (intercept, Block, TPPR)
    theta = getME(mod.new, "theta"),#Random slope
    sigma = getME(mod.new, "sigma"))
    
    
  # Simulate:
  
  ss <- simulate(~ Type*phase + b2*Type + (0+b1|ID)+(0+b1|ID:Type), nsim = nsim, newdata = expdat_rd, newparams = newparams_rd, family=gaussian, re.form=~0, allow.new.levels=TRUE)
  

  #####################################################################
  
  
 
    
    expdat_rd$TargetRT <- ss[, 1]
  
         new1<- lmer(TargetRT  ~ Type*phase +b2*Type + (0+b1|ID)+(0+b1|ID:Type), data = expdat_rd, REML = TRUE,control = lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb")))
         
  summary(new1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: TargetRT ~ Type * phase + b2 * Type + (0 + b1 | ID) + (0 + b1 |  
##     ID:Type)
##    Data: expdat_rd
## Control: 
## lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb"))
## 
## REML criterion at convergence: 84156.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2558 -0.6616  0.0060  0.6711  3.3572 
## 
## Random effects:
##  Groups   Name Variance Std.Dev.
##  ID:Type  b1      29.70   5.450 
##  ID       b1      59.31   7.701 
##  Residual      28303.96 168.238 
## Number of obs: 6400, groups:  ID:Type, 160; ID, 40
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)      609.99291   18.17176   33.57
## TypeAdj_D        -67.02826   25.69875   -2.61
## TypeAdj_P        -32.95021   25.69875   -1.28
## TypeNon_D         -3.76528   25.69875   -0.15
## phase            -18.22049   20.29025   -0.90
## b2                -0.57944    2.92864   -0.20
## TypeAdj_D:phase -311.76937   28.52770  -10.93
## TypeAdj_P:phase -235.17603   28.52770   -8.24
## TypeNon_D:phase -190.13028   28.52770   -6.66
## TypeAdj_D:b2       4.32523    4.14173    1.04
## TypeAdj_P:b2       0.95538    4.14173    0.23
## TypeNon_D:b2      -0.06686    4.14173   -0.02
## 
## Correlation of Fixed Effects:
##             (Intr) TypA_D TypA_P TypN_D phase  b2     TyA_D: TyA_P: TyN_D:
## TypeAdj_D   -0.707                                                        
## TypeAdj_P   -0.707  0.500                                                 
## TypeNon_D   -0.707  0.500  0.500                                          
## phase       -0.896  0.633  0.633  0.633                                   
## b2          -0.886  0.627  0.627  0.627  0.794                            
## TypAdj_D:ph  0.637 -0.901 -0.450 -0.450 -0.703 -0.565                     
## TypAdj_P:ph  0.637 -0.450 -0.901 -0.450 -0.703 -0.565  0.500              
## TypNn_D:phs  0.637 -0.450 -0.450 -0.901 -0.703 -0.565  0.500  0.500       
## TypAdj_D:b2  0.627 -0.886 -0.443 -0.443 -0.561 -0.707  0.799  0.399  0.399
## TypAdj_P:b2  0.627 -0.443 -0.886 -0.443 -0.561 -0.707  0.399  0.799  0.399
## TypeNn_D:b2  0.627 -0.443 -0.443 -0.886 -0.561 -0.707  0.399  0.399  0.799
##             TA_D:2 TA_P:2
## TypeAdj_D                
## TypeAdj_P                
## TypeNon_D                
## phase                    
## b2                       
## TypAdj_D:ph              
## TypAdj_P:ph              
## TypNn_D:phs              
## TypAdj_D:b2              
## TypAdj_P:b2  0.500       
## TypeNn_D:b2  0.500  0.500
  ifelse(any(round(sqrt(diag(vcov(new1)))[7:9],3)>30),print("SE: too large"),print("SE: OK"))
## [1] "SE: OK"
## [1] "SE: OK"